#Alexander F. Gazmararian
#afg2@princeton.edu
#January 19, 2024

#Load packages
library(tidyverse)
library(tidylog)
library(readxl)
library(here)

#load data
g <- read_xlsx(here("data", "netmetering", "net_metering2022.xlsx"))

#subset to relevant columns
g <- g[, c(1,2,3,5,38)]#column 38 is the amount of residential energy sold back in MWh
names(g) <- c("year", "month", "state", "utility", "mwh")
g <- g[-c(1:3),]
#subset to places with residential net solar metering
g <- g %>% mutate(mwh = as.numeric(mwh))
g <- g %>% filter(!is.na(mwh))
#aggregate to monthly level
g <- g %>%
  group_by(year,state,utility) %>%
  summarize(mwh=sum(mwh))

#load price by utility state
p <- read_xlsx(here("data", "netmetering", "table6.xlsx"))
p <- p[-c(1:2),]
names(p) <- c("utility","state","ownership","customers", "sales","revenue","avgprice")
#fix name
p[p$utility=="Mohave Electric Cooperative, I",]$utility <- "Mohave Electric Cooperative, Inc."
#check for match
d <- left_join(g,p, by = c("state","utility"))
#construct average price
d$kwh <- d$mwh * 1000
d$avgprice <- as.numeric(d$avgprice) / 100
#Aggregate
d <- d %>%
  group_by(state) %>%
  summarize(rev = sum(kwh * avgprice, na.rm = TRUE))

#Save processed data
saveRDS(d, here("data", "netmetering", "netmeterrev.rds"))
